{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Imports & Settings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:26:22.735641Z",
     "start_time": "2018-12-10T05:26:22.732284Z"
    }
   },
   "outputs": [],
   "source": [
    "from time import time\n",
    "import warnings\n",
    "from collections import Counter\n",
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "from gensim.models import Word2Vec, KeyedVectors\n",
    "from gensim.scripts.glove2word2vec import glove2word2vec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:26:22.809782Z",
     "start_time": "2018-12-10T05:26:22.807930Z"
    }
   },
   "outputs": [],
   "source": [
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:23:31.270587Z",
     "start_time": "2018-12-10T05:23:31.261745Z"
    }
   },
   "outputs": [],
   "source": [
    "analogies_path = Path('data', 'analogies', 'analogies-en.txt')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Convert GloVE Vectors to gensim format"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The various GloVE vectors are available [here](https://nlp.stanford.edu/projects/glove/). Download link for the [wikipedia](http://nlp.stanford.edu/data/glove.6B.zip) version. Unzip and store in `data/glove`."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### WikiPedia"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T06:21:41.349764Z",
     "start_time": "2018-12-10T06:21:41.347458Z"
    }
   },
   "outputs": [],
   "source": [
    "glove_path = Path('data/glove')\n",
    "glove_wiki_file= glove_path / 'glove.6B.300d.txt'\n",
    "word2vec_wiki_file = glove_path / 'glove.wiki.gensim.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T06:21:44.300116Z",
     "start_time": "2018-12-10T06:21:41.533781Z"
    }
   },
   "outputs": [],
   "source": [
    "glove2word2vec(glove_input_file=glove_wiki_file, word2vec_output_file=word2vec_wiki_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Twitter Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:44:52.104643Z",
     "start_time": "2018-12-10T05:44:52.096912Z"
    }
   },
   "outputs": [],
   "source": [
    "glove_twitter_file= glove_path / 'glove.twitter.27B.200d.txt'\n",
    "word2vec_twitter_file = glove_path / 'glove.twitter.gensim.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:45:02.864556Z",
     "start_time": "2018-12-10T05:44:59.034198Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1193517, 200)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "glove2word2vec(glove_input_file=glove_twitter_file, word2vec_output_file=word2vec_twitter_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Common Crawl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:59:20.729441Z",
     "start_time": "2018-12-10T05:59:20.721920Z"
    }
   },
   "outputs": [],
   "source": [
    "glove_crawl_file= glove_path / 'glove.840B.300d.txt'\n",
    "word2vec_crawl_file = glove_path / 'glove.crawl.gensim.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:59:40.404114Z",
     "start_time": "2018-12-10T05:59:28.731439Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2196018, 300)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "glove2word2vec(glove_input_file=glove_crawl_file, word2vec_output_file=word2vec_crawl_file)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Evaluate embeddings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T06:21:48.973717Z",
     "start_time": "2018-12-10T06:21:48.965153Z"
    }
   },
   "outputs": [],
   "source": [
    "def eval_analogies(file_name, vocab=30000):\n",
    "    model = KeyedVectors.load_word2vec_format(file_name, binary=False)\n",
    "    accuracy = model.wv.accuracy(analogies_path,\n",
    "                                 restrict_vocab=vocab,\n",
    "                                 case_insensitive=True)\n",
    "    return (pd.DataFrame([[c['section'],\n",
    "                           len(c['correct']),\n",
    "                           len(c['incorrect'])] for c in accuracy],\n",
    "                         columns=['category', 'correct', 'incorrect'])\n",
    "            .assign(samples=lambda x: x.correct.add(x.incorrect))\n",
    "            .assign(average=lambda x: x.correct.div(x.samples))\n",
    "            .drop(['correct', 'incorrect'], axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T06:33:06.540388Z",
     "start_time": "2018-12-10T06:28:21.484660Z"
    }
   },
   "outputs": [],
   "source": [
    "result = eval_analogies(word2vec_twitter_file, vocab=100000)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### twitter result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T06:33:06.559308Z",
     "start_time": "2018-12-10T06:33:06.553450Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>samples</th>\n",
       "      <th>average</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>capital-common-countries</td>\n",
       "      <td>462</td>\n",
       "      <td>0.701299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>capital-world</td>\n",
       "      <td>930</td>\n",
       "      <td>0.690323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>city-in-state</td>\n",
       "      <td>3644</td>\n",
       "      <td>0.350714</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>currency</td>\n",
       "      <td>268</td>\n",
       "      <td>0.018657</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>family</td>\n",
       "      <td>342</td>\n",
       "      <td>0.824561</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>gram1-adjective-to-adverb</td>\n",
       "      <td>650</td>\n",
       "      <td>0.143077</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>gram2-opposite</td>\n",
       "      <td>342</td>\n",
       "      <td>0.365497</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>gram3-comparative</td>\n",
       "      <td>1260</td>\n",
       "      <td>0.757937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>gram4-superlative</td>\n",
       "      <td>930</td>\n",
       "      <td>0.686022</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>gram5-present-participle</td>\n",
       "      <td>702</td>\n",
       "      <td>0.750712</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>gram6-nationality-adjective</td>\n",
       "      <td>870</td>\n",
       "      <td>0.750575</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>gram7-past-tense</td>\n",
       "      <td>1190</td>\n",
       "      <td>0.576471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>gram8-plural</td>\n",
       "      <td>1122</td>\n",
       "      <td>0.811052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>gram9-plural-verbs</td>\n",
       "      <td>600</td>\n",
       "      <td>0.655000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>total</td>\n",
       "      <td>13312</td>\n",
       "      <td>0.564228</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       category  samples   average\n",
       "0      capital-common-countries      462  0.701299\n",
       "1                 capital-world      930  0.690323\n",
       "2                 city-in-state     3644  0.350714\n",
       "3                      currency      268  0.018657\n",
       "4                        family      342  0.824561\n",
       "5     gram1-adjective-to-adverb      650  0.143077\n",
       "6                gram2-opposite      342  0.365497\n",
       "7             gram3-comparative     1260  0.757937\n",
       "8             gram4-superlative      930  0.686022\n",
       "9      gram5-present-participle      702  0.750712\n",
       "10  gram6-nationality-adjective      870  0.750575\n",
       "11             gram7-past-tense     1190  0.576471\n",
       "12                 gram8-plural     1122  0.811052\n",
       "13           gram9-plural-verbs      600  0.655000\n",
       "14                        total    13312  0.564228"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### wiki result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T06:28:21.483713Z",
     "start_time": "2018-12-10T06:28:21.477881Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>samples</th>\n",
       "      <th>average</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>capital-common-countries</td>\n",
       "      <td>506</td>\n",
       "      <td>0.948617</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>capital-world</td>\n",
       "      <td>8372</td>\n",
       "      <td>0.964644</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>city-in-state</td>\n",
       "      <td>4242</td>\n",
       "      <td>0.599953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>currency</td>\n",
       "      <td>752</td>\n",
       "      <td>0.174202</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>family</td>\n",
       "      <td>506</td>\n",
       "      <td>0.881423</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>gram1-adjective-to-adverb</td>\n",
       "      <td>992</td>\n",
       "      <td>0.225806</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>gram2-opposite</td>\n",
       "      <td>756</td>\n",
       "      <td>0.285714</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>gram3-comparative</td>\n",
       "      <td>1332</td>\n",
       "      <td>0.882132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>gram4-superlative</td>\n",
       "      <td>1056</td>\n",
       "      <td>0.746212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>gram5-present-participle</td>\n",
       "      <td>1056</td>\n",
       "      <td>0.699811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>gram6-nationality-adjective</td>\n",
       "      <td>1640</td>\n",
       "      <td>0.925000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>gram7-past-tense</td>\n",
       "      <td>1560</td>\n",
       "      <td>0.611538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>gram8-plural</td>\n",
       "      <td>1332</td>\n",
       "      <td>0.780781</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>gram9-plural-verbs</td>\n",
       "      <td>870</td>\n",
       "      <td>0.585057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>total</td>\n",
       "      <td>24972</td>\n",
       "      <td>0.754445</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       category  samples   average\n",
       "0      capital-common-countries      506  0.948617\n",
       "1                 capital-world     8372  0.964644\n",
       "2                 city-in-state     4242  0.599953\n",
       "3                      currency      752  0.174202\n",
       "4                        family      506  0.881423\n",
       "5     gram1-adjective-to-adverb      992  0.225806\n",
       "6                gram2-opposite      756  0.285714\n",
       "7             gram3-comparative     1332  0.882132\n",
       "8             gram4-superlative     1056  0.746212\n",
       "9      gram5-present-participle     1056  0.699811\n",
       "10  gram6-nationality-adjective     1640  0.925000\n",
       "11             gram7-past-tense     1560  0.611538\n",
       "12                 gram8-plural     1332  0.780781\n",
       "13           gram9-plural-verbs      870  0.585057\n",
       "14                        total    24972  0.754445"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Common Crawl result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T06:20:56.028002Z",
     "start_time": "2018-12-10T06:20:56.021706Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>samples</th>\n",
       "      <th>average</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>capital-common-countries</td>\n",
       "      <td>506</td>\n",
       "      <td>0.946640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>capital-world</td>\n",
       "      <td>4290</td>\n",
       "      <td>0.917483</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>city-in-state</td>\n",
       "      <td>4242</td>\n",
       "      <td>0.706742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>currency</td>\n",
       "      <td>206</td>\n",
       "      <td>0.184466</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>family</td>\n",
       "      <td>420</td>\n",
       "      <td>0.978571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>gram1-adjective-to-adverb</td>\n",
       "      <td>992</td>\n",
       "      <td>0.388105</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>gram2-opposite</td>\n",
       "      <td>702</td>\n",
       "      <td>0.363248</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>gram3-comparative</td>\n",
       "      <td>1332</td>\n",
       "      <td>0.876877</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>gram4-superlative</td>\n",
       "      <td>1122</td>\n",
       "      <td>0.919786</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>gram5-present-participle</td>\n",
       "      <td>1056</td>\n",
       "      <td>0.827652</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>gram6-nationality-adjective</td>\n",
       "      <td>1406</td>\n",
       "      <td>0.948791</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>gram7-past-tense</td>\n",
       "      <td>1560</td>\n",
       "      <td>0.621154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>gram8-plural</td>\n",
       "      <td>1332</td>\n",
       "      <td>0.864114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>gram9-plural-verbs</td>\n",
       "      <td>870</td>\n",
       "      <td>0.672414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>total</td>\n",
       "      <td>20036</td>\n",
       "      <td>0.779347</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       category  samples   average\n",
       "0      capital-common-countries      506  0.946640\n",
       "1                 capital-world     4290  0.917483\n",
       "2                 city-in-state     4242  0.706742\n",
       "3                      currency      206  0.184466\n",
       "4                        family      420  0.978571\n",
       "5     gram1-adjective-to-adverb      992  0.388105\n",
       "6                gram2-opposite      702  0.363248\n",
       "7             gram3-comparative     1332  0.876877\n",
       "8             gram4-superlative     1122  0.919786\n",
       "9      gram5-present-participle     1056  0.827652\n",
       "10  gram6-nationality-adjective     1406  0.948791\n",
       "11             gram7-past-tense     1560  0.621154\n",
       "12                 gram8-plural     1332  0.864114\n",
       "13           gram9-plural-verbs      870  0.672414\n",
       "14                        total    20036  0.779347"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:29:37.510823Z",
     "start_time": "2018-12-10T05:29:37.498492Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>correct</th>\n",
       "      <th>incorrect</th>\n",
       "      <th>average</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>capital-common-countries</td>\n",
       "      <td>482</td>\n",
       "      <td>24</td>\n",
       "      <td>0.952569</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>capital-world</td>\n",
       "      <td>6093</td>\n",
       "      <td>227</td>\n",
       "      <td>0.964082</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>city-in-state</td>\n",
       "      <td>2472</td>\n",
       "      <td>1646</td>\n",
       "      <td>0.600291</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>currency</td>\n",
       "      <td>112</td>\n",
       "      <td>390</td>\n",
       "      <td>0.223108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>family</td>\n",
       "      <td>392</td>\n",
       "      <td>28</td>\n",
       "      <td>0.933333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>gram1-adjective-to-adverb</td>\n",
       "      <td>228</td>\n",
       "      <td>764</td>\n",
       "      <td>0.229839</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>gram2-opposite</td>\n",
       "      <td>205</td>\n",
       "      <td>497</td>\n",
       "      <td>0.292023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>gram3-comparative</td>\n",
       "      <td>1175</td>\n",
       "      <td>157</td>\n",
       "      <td>0.882132</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>gram4-superlative</td>\n",
       "      <td>737</td>\n",
       "      <td>193</td>\n",
       "      <td>0.792473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>gram5-present-participle</td>\n",
       "      <td>686</td>\n",
       "      <td>306</td>\n",
       "      <td>0.691532</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>gram6-nationality-adjective</td>\n",
       "      <td>1445</td>\n",
       "      <td>37</td>\n",
       "      <td>0.975034</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>gram7-past-tense</td>\n",
       "      <td>954</td>\n",
       "      <td>606</td>\n",
       "      <td>0.611538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>gram8-plural</td>\n",
       "      <td>1016</td>\n",
       "      <td>244</td>\n",
       "      <td>0.806349</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>gram9-plural-verbs</td>\n",
       "      <td>472</td>\n",
       "      <td>340</td>\n",
       "      <td>0.581281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>total</td>\n",
       "      <td>16469</td>\n",
       "      <td>5459</td>\n",
       "      <td>0.751049</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                       category  correct  incorrect   average\n",
       "0      capital-common-countries      482         24  0.952569\n",
       "1                 capital-world     6093        227  0.964082\n",
       "2                 city-in-state     2472       1646  0.600291\n",
       "3                      currency      112        390  0.223108\n",
       "4                        family      392         28  0.933333\n",
       "5     gram1-adjective-to-adverb      228        764  0.229839\n",
       "6                gram2-opposite      205        497  0.292023\n",
       "7             gram3-comparative     1175        157  0.882132\n",
       "8             gram4-superlative      737        193  0.792473\n",
       "9      gram5-present-participle      686        306  0.691532\n",
       "10  gram6-nationality-adjective     1445         37  0.975034\n",
       "11             gram7-past-tense      954        606  0.611538\n",
       "12                 gram8-plural     1016        244  0.806349\n",
       "13           gram9-plural-verbs      472        340  0.581281\n",
       "14                        total    16469       5459  0.751049"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-12-10T05:29:55.829245Z",
     "start_time": "2018-12-10T05:29:55.822131Z"
    }
   },
   "outputs": [],
   "source": [
    "result.to_csv(glove_path / 'accuracy.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
